load data
USE_DC <- FALSE
text = read.csv('/Users/roso8920/Emotive Computing Dropbox/Rosy Southwell/EML Rosy/EMLLM/info/ia_label_mapping_opt_surprisal.csv')
if (USE_DC){eeg_dir = '/Users/roso8920/Emotive Computing Dropbox/Rosy Southwell/EML Rosy/Data/EEG_processed/unfolded_FRP_reparsed_v6/n400_stats_recomputed/'
} else {eeg_dir = '/Users/roso8920/Emotive Computing Dropbox/Rosy Southwell/EML Rosy/Data/EEG_processed/unfolded_FRP_reparsed_v6/n400_stats_nodc/'
}
file_pat = '_reading_N400_stats\\.csv$'
files=list.files(path = eeg_dir, pattern= file_pat)
Loop over participants
all<-c()
i<-0
for (f in files){
i=i+1
df <-read.csv(paste0(eeg_dir,f))
p <- strsplit(f,'_')
pID <- paste(p[[1]][1],p[[1]][2],sep='_')
df$pID <- pID
# df<- df %>% dplyr::select(c("pID", "type","latency","fixDur","task","identifier" , "page_fixation_ix","IA_ID","n400_magnitude_CPz","n400_latency_CPz" ))
df<- df %>% filter(task=='reading') %>% droplevels()
all[[i]] <- df
}
df <- do.call(rbind, all)
Merge with text info for surprisal values
delta_wf = log(min(text$word_freq[text$word_freq>0]))-1 # use the minimum word frequency for any missing word frerquencies so we can take log
text <- text %>% rename(surprisalText=opt.125m_surprisal_wholetext, surprisalPage = opt.125m_surprisal_page)
text<- text %>%mutate(logSurprisalText = log(surprisalText),
logSurprisalPage = log(surprisalPage),
logWordFreq = ifelse(word_freq>0,log(word_freq),delta_wf)
)
df <- df %>% merge(text, on=c("IA_ID","identifier")) %>% rename(fixDur=duration)
# drop rows with no IA
df <- drop_na(df, "IA_LABEL")
# drop rows with punc
df <- df %>% filter(! IA_LABEL %in% c('.',',','-','--',';',':',"'",'?','!'))
feats <- grep("n400|p1|n1|fixDur",colnames(df), value=T)
feats_eeg <- grep("n400|p1|n1",colnames(df), value=T)
basic checks on text variables
ggplot(text,aes(x=relative_word_position, y=logSurprisalText)) +
geom_point(alpha=0.5, size=0.5)+stat_cor(method="pearson", size=4, r.accuracy=0.01, p.accuracy=0.001)

examine
df_sub <- df %>% group_by(pID) %>% summarise_if(is.numeric,mean, na.rm=T)
plots
df_long <- df %>% pivot_longer(all_of(c(feats,'logFixDur')), names_to="feature",values_to="value")
df_long <- df_long %>% mutate(channel=str_match(feature, 'n400_\\w*_([[:alnum:]]{2,5})$')[,2],
feature_type=str_match(feature, '(n400_\\w*)_[[:alnum:]]{2,5}$')[,2])
# compute lower and upper whiskers for each group
ylims <- df_long %>%
group_by(feature_type) %>%
summarise(Q1 = quantile(value, 1/100), Q3 = quantile(value, 99/100)) %>%
ungroup()
p1<-ggplot(df_long %>% filter(feature_type=='n400_mean')) +
geom_density(aes(x=value, fill=channel), alpha=0.3) + xlim(-1000,1000) + ggtitle('n400_mean')
p2<-ggplot(df_long %>% filter(feature_type=='n400_min_magnitude')) +
geom_density(aes(x=value, fill=channel), alpha=0.3) + xlim(-1000,1000) + ggtitle('n400_min')
p3<-ggplot(df_long %>% filter(feature_type=='n400_max_magnitude')) +
geom_density(aes(x=value, fill=channel), alpha=0.3) + xlim(-1000,1000) + ggtitle('n400_max')
grid.arrange(p1,p2,p3)

ggplot(df)+
geom_density(aes(x=fixDur))+
scale_x_log10()

ggplot(df)+
geom_density(aes(x=surprisalText))+
scale_x_log10()

ggplot(df_long)+
geom_violin(aes(x=value,y=factor(channel), fill=factor(channel),outliers = FALSE), alpha=0.3)+
facet_wrap(~feature_type, scales="free")

ggplot(df_long %>% filter(feature_type =='n400_mean'))+
geom_point(aes(x=surprisalText,y=value, color=factor(channel)), alpha=0.05, size=0.5)+
facet_wrap(~channel, scales="free")

repeat some plots after transformatiions
ggplot(df,aes(x=logSurprisalText, y=logFixDur)) +
geom_point(size=1, alpha=0.01)+stat_cor(method="pearson")

ggplot(df_long %>% filter(feature_type =='n400_mean'),aes(x=logSurprisalText,y=value, color=factor(channel)))+
geom_point(alpha=0.01, size=0.5)+stat_cor(method="pearson", size=2, r.accuracy=0.01, p.accuracy=0.001, label.x=-7.5, label.y=1000)+
facet_wrap(~channel, scales="free")

ggplot(df_long %>% filter(feature_type =='n400_max_magnitude'),aes(x=logSurprisalText,y=value, color=factor(channel)))+
geom_point(alpha=0.01, size=0.5)+stat_cor(method="pearson", size=2, r.accuracy=0.01, p.accuracy=0.001, label.x=-7.5, label.y=1000)+
facet_wrap(~channel, scales="free")

ggplot(df_long %>% filter(feature_type =='n400_min_magnitude'),aes(x=logSurprisalText,y=value, color=factor(channel)))+
geom_point(alpha=0.01, size=0.5)+stat_cor(method="pearson", size=2, r.accuracy=0.01, p.accuracy=0.001, label.x=-7.5, label.y=1000)+
facet_wrap(~channel, scales="free")

LME stats
predict EEG from LOG surprisal and fixDur
m.l.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.min <- lmer(n400_max_magnitude_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.max <- lmer(n400_min_magnitude_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.zc <- lmer(n400_zero_crossings_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.p1 <-lmer(p1_mean ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
m.l.n1 <- lmer(n1_mean ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df)
tab_model(m.l.m, m.l.p1, m.l.n1, show.ci=F)
|
|
n 400 mean C Pz
|
p 1 mean
|
n 1 mean
|
|
Predictors
|
Estimates
|
p
|
Estimates
|
p
|
Estimates
|
p
|
|
(Intercept)
|
17.14
|
<0.001
|
-54.07
|
<0.001
|
-54.93
|
<0.001
|
|
logFixDur
|
-2.85
|
<0.001
|
17.61
|
<0.001
|
9.67
|
<0.001
|
|
logSurprisalText
|
-0.91
|
<0.001
|
-0.88
|
0.019
|
-1.21
|
0.001
|
|
Random Effects
|
|
σ2
|
10397.76
|
25848.40
|
25986.22
|
|
τ00
|
98.97 pID
|
469.06 pID
|
152.63 pID
|
|
|
26.39 identifier
|
72.74 identifier
|
53.34 identifier
|
|
ICC
|
0.01
|
0.02
|
0.01
|
|
N
|
50 identifier
|
50 identifier
|
50 identifier
|
|
|
91 pID
|
91 pID
|
91 pID
|
|
Observations
|
104272
|
104272
|
104272
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.012
|
0.003 / 0.023
|
0.001 / 0.009
|
predict fixDur from surprisal
m.d <- lmer(fixDur ~ (1|identifier) + (1|pID) + logSurprisalText, data=df)
tab_model(m.d)
|
|
fix Dur
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
208.40
|
202.58 – 214.22
|
<0.001
|
|
logSurprisalText
|
1.81
|
1.38 – 2.24
|
<0.001
|
|
Random Effects
|
|
σ2
|
8839.40
|
|
τ00 pID
|
729.69
|
|
τ00 identifier
|
31.32
|
|
ICC
|
0.08
|
|
N identifier
|
50
|
|
N pID
|
91
|
|
Observations
|
104272
|
|
Marginal R2 / Conditional R2
|
0.001 / 0.080
|
predict fixation fixDur from EEG features
m.eye <- lmer(fixDur ~ (1|identifier) + (1|pID) + n400_mean_CPz, data=df)
tab_model(m.eye)
|
|
fix Dur
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
210.22
|
204.41 – 216.02
|
<0.001
|
|
n400 mean CPz
|
-0.01
|
-0.01 – -0.00
|
0.003
|
|
Random Effects
|
|
σ2
|
8844.38
|
|
τ00 pID
|
729.05
|
|
τ00 identifier
|
32.34
|
|
ICC
|
0.08
|
|
N identifier
|
50
|
|
N pID
|
91
|
|
Observations
|
104272
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.079
|
predict EEG and surprisal from previous IA surprisal
mp.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText + prev_logSurprisalText, data=df)
tab_model(mp.m)
|
|
n 400 mean C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
16.72
|
9.04 – 24.40
|
<0.001
|
|
logFixDur
|
-2.71
|
-4.08 – -1.33
|
<0.001
|
|
logSurprisalText
|
-0.87
|
-1.34 – -0.40
|
<0.001
|
|
prev logSurprisalText
|
-0.52
|
-0.99 – -0.06
|
0.028
|
|
Random Effects
|
|
σ2
|
10336.85
|
|
τ00 pID
|
100.21
|
|
τ00 identifier
|
26.79
|
|
ICC
|
0.01
|
|
N identifier
|
50
|
|
N pID
|
91
|
|
Observations
|
102838
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.013
|
mp.s <- lm( logSurprisalText ~ prev_logSurprisalText, data=df)
tab_model(mp.s)
|
|
log Surprisal Text
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
0.89
|
0.88 – 0.90
|
<0.001
|
|
prev logSurprisalText
|
0.15
|
0.14 – 0.15
|
<0.001
|
|
Observations
|
102838
|
|
R2 / R2 adjusted
|
0.022 / 0.022
|
model including bool for refixation
mr.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + refixation*logSurprisalText, data=df)
tab_model(mr.m)
|
|
n 400 mean C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
16.93
|
9.23 – 24.62
|
<0.001
|
|
logFixDur
|
-2.86
|
-4.22 – -1.49
|
<0.001
|
|
refixation [1]
|
0.54
|
-1.08 – 2.15
|
0.515
|
|
logSurprisalText
|
-0.49
|
-1.24 – 0.25
|
0.192
|
refixation [1] * logSurprisalText
|
-0.68
|
-1.63 – 0.27
|
0.160
|
|
Random Effects
|
|
σ2
|
10397.75
|
|
τ00 pID
|
99.06
|
|
τ00 identifier
|
26.38
|
|
ICC
|
0.01
|
|
N identifier
|
50
|
|
N pID
|
91
|
|
Observations
|
104272
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.012
|
Anova(mr.m)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: n400_mean_CPz
## Chisq Df Pr(>Chisq)
## logFixDur 16.8361 1 0.00004075 ***
## refixation 0.0206 1 0.8859451
## logSurprisalText 14.5204 1 0.0001387 ***
## refixation:logSurprisalText 1.9730 1 0.1601283
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot_model(mr.m,type = "emm", terms=c("logSurprisalText", "refixation"))

tb<-tableone::CreateTableOne(data = df, vars = c("n400_mean_CPz", "logFixDur"), strata = 'refixation', test=T)
knitr::kable(print(tb))
## Stratified by refixation
## 0 1 p test
## n 36498 67774
## n400_mean_CPz (mean (SD)) 1.15 (105.74) 0.98 (100.75) 0.795
## logFixDur (mean (SD)) 5.27 (0.44) 5.24 (0.49) <0.001
| n |
36498 |
67774 |
|
|
| n400_mean_CPz (mean (SD)) |
1.15 (105.74) |
0.98 (100.75) |
0.795 |
|
| logFixDur (mean (SD)) |
5.27 (0.44) |
5.24 (0.49) |
<0.001 |
|
model only firstpass fixations
m.fp.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df %>% filter(refixation_==1))
# m.fp.min <- lmer(n400_max_magnitude_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df %>% filter(refixation_==1))
# m.fp.max <- lmer(n400_min_magnitude_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df %>% filter(refixation_==1))
# m.fp.zc <- lmer(n400_zero_crossings_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df %>% filter(refixation_==1))
# tab_model(m.fp.m, m.fp.min, m.fp.max, m.fp.zc, show.ci=F)
m.fp.p1 <-lmer(p1_mean ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df%>% filter(refixation_==1))
m.fp.n1 <- lmer(n1_mean ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df%>% filter(refixation_==1))
tab_model(m.fp.m, m.fp.p1, m.fp.n1, show.ci=F)
|
|
n 400 mean C Pz
|
p 1 mean
|
n 1 mean
|
|
Predictors
|
Estimates
|
p
|
Estimates
|
p
|
Estimates
|
p
|
|
(Intercept)
|
11.96
|
0.007
|
-54.47
|
<0.001
|
-54.40
|
<0.001
|
|
logFixDur
|
-1.88
|
0.022
|
17.47
|
<0.001
|
9.52
|
<0.001
|
|
logSurprisalText
|
-1.13
|
<0.001
|
-1.16
|
0.014
|
-1.31
|
0.006
|
|
Random Effects
|
|
σ2
|
10073.81
|
24711.30
|
24924.04
|
|
τ00
|
70.33 pID
|
492.04 pID
|
177.96 pID
|
|
|
21.42 identifier
|
62.87 identifier
|
45.67 identifier
|
|
ICC
|
0.01
|
0.02
|
0.01
|
|
N
|
50 identifier
|
50 identifier
|
50 identifier
|
|
|
91 pID
|
91 pID
|
91 pID
|
|
Observations
|
67774
|
67774
|
67774
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.009
|
0.003 / 0.025
|
0.001 / 0.010
|
model including other lexical variables & first pass fixations only
m.lx.m <- lmer(n400_mean_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText + logWordFreq + relative_word_position, data=df %>% filter(refixation_==1))
# m.fp.min <- lmer(n400_max_magnitude_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df %>% filter(refixation_==1))
# m.fp.max <- lmer(n400_min_magnitude_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df %>% filter(refixation_==1))
# m.fp.zc <- lmer(n400_zero_crossings_CPz ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText, data=df %>% filter(refixation_==1))
# tab_model(m.fp.m, m.fp.min, m.fp.max, m.fp.zc, show.ci=F)
m.lx.p1 <-lmer(p1_mean ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText+ logWordFreq + relative_word_position, data=df%>% filter(refixation_==1))
m.lx.n1 <- lmer(n1_mean ~ (1|identifier) + (1|pID) + logFixDur + logSurprisalText+ logWordFreq + relative_word_position, data=df%>% filter(refixation_==1))
tab_model(m.lx.m, m.lx.p1, m.lx.n1, show.ci=F)
|
|
n 400 mean C Pz
|
p 1 mean
|
n 1 mean
|
|
Predictors
|
Estimates
|
p
|
Estimates
|
p
|
Estimates
|
p
|
|
(Intercept)
|
7.50
|
0.104
|
-62.90
|
<0.001
|
-57.07
|
<0.001
|
|
logFixDur
|
-1.95
|
0.018
|
17.39
|
<0.001
|
9.56
|
<0.001
|
|
logSurprisalText
|
-1.31
|
<0.001
|
-1.28
|
0.010
|
-0.97
|
0.051
|
|
logWordFreq
|
-0.33
|
0.013
|
-0.38
|
0.072
|
0.30
|
0.150
|
|
relative word position
|
4.04
|
0.004
|
11.77
|
<0.001
|
10.66
|
<0.001
|
|
Random Effects
|
|
σ2
|
10071.71
|
24699.86
|
24916.07
|
|
τ00
|
70.24 pID
|
492.01 pID
|
177.87 pID
|
|
|
21.59 identifier
|
62.13 identifier
|
45.21 identifier
|
|
ICC
|
0.01
|
0.02
|
0.01
|
|
N
|
50 identifier
|
50 identifier
|
50 identifier
|
|
|
91 pID
|
91 pID
|
91 pID
|
|
Observations
|
67774
|
67774
|
67774
|
|
Marginal R2 / Conditional R2
|
0.001 / 0.010
|
0.003 / 0.025
|
0.001 / 0.010
|
predict EEG from surprisal and fixDur with random slope of surprisal for participant
m.rs.m <- lmer(n400_mean_CPz ~ (1+logSurprisalText|pID) + (1|identifier) + logSurprisalText + logFixDur, data=df)
m.rs.min <- lmer(n400_max_magnitude_CPz ~ (1+logSurprisalText|pID) + (1|identifier) + logFixDur + logSurprisalText, data=df)
m.rs.max <- lmer(n400_min_magnitude_CPz ~(1+logSurprisalText|pID) + (1|identifier) + logFixDur + logSurprisalText, data=df)
tab_model(m.rs.m, m.rs.min, m.rs.max, show.ci=F)
|
|
n 400 mean C Pz
|
n 400 max magnitude C Pz
|
n 400 min magnitude C Pz
|
|
Predictors
|
Estimates
|
p
|
Estimates
|
p
|
Estimates
|
p
|
|
(Intercept)
|
17.12
|
<0.001
|
95.94
|
<0.001
|
-64.66
|
<0.001
|
|
logSurprisalText
|
-0.93
|
0.003
|
-1.01
|
0.002
|
-0.74
|
0.019
|
|
logFixDur
|
-2.84
|
<0.001
|
-3.64
|
<0.001
|
-1.84
|
0.015
|
|
Random Effects
|
|
σ2
|
10391.52
|
10709.01
|
12203.94
|
|
τ00
|
112.29 pID
|
907.49 pID
|
709.26 pID
|
|
|
26.68 identifier
|
29.53 identifier
|
31.44 identifier
|
|
τ11
|
3.31 pID.logSurprisalText
|
4.00 pID.logSurprisalText
|
2.87 pID.logSurprisalText
|
|
ρ01
|
-0.43 pID
|
-0.04 pID
|
-0.06 pID
|
|
ICC
|
0.01
|
0.08
|
0.06
|
|
N
|
91 pID
|
91 pID
|
91 pID
|
|
|
50 identifier
|
50 identifier
|
50 identifier
|
|
Observations
|
104272
|
104272
|
104272
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.013
|
0.000 / 0.081
|
0.000 / 0.058
|
dotplot(ranef(m.rs.m))
## $pID

##
## $identifier

Modeling with comprehension/MW
Load Behavioural data
df_comp <- read.csv('/Users/roso8920/Emotive Computing Dropbox/Rosy Southwell/EyeMindLink/Processed/Behaviour/EML1_page_level.csv')
df_comp <- df_comp %>% rename(pID=ParticipantID) %>%
mutate(Page=PageNum-1,
identifier=paste0(Text, Page))
df <- merge(df, df_comp, on=c("pID","identifier"))
df_mw <- df %>% drop_na(MW) %>% mutate(MW=as.factor(MW))
model FIXATION LEVEL N400 ~ surprisal*MW (caveat - MW is at page level)
m.mw <- lmer(n400_mean_CPz ~ (1+logSurprisalText|pID) + (1|identifier) + logSurprisalText*MW , data=df_mw)
Anova(m.mw)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: n400_mean_CPz
## Chisq Df Pr(>Chisq)
## logSurprisalText 0.5168 1 0.4722
## MW 0.5501 1 0.4583
## logSurprisalText:MW 1.2574 1 0.2621
tab_model(m.mw)
|
|
n 400 mean C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
3.71
|
-1.92 – 9.34
|
0.196
|
|
logSurprisalText
|
0.06
|
-1.29 – 1.40
|
0.934
|
|
MW [1]
|
2.99
|
-1.81 – 7.79
|
0.222
|
|
logSurprisalText * MW [1]
|
-1.22
|
-3.36 – 0.91
|
0.262
|
|
Random Effects
|
|
σ2
|
9889.06
|
|
τ00 pID
|
333.43
|
|
τ00 identifier
|
55.66
|
|
τ11 pID.logSurprisalText
|
2.97
|
|
ρ01 pID
|
-0.85
|
|
ICC
|
0.03
|
|
N pID
|
91
|
|
N identifier
|
20
|
|
Observations
|
22322
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.033
|
plot_model(m.mw,type = "emm", terms=c("logSurprisalText", "MW"))

plot_model(m.mw,type = "emm", terms=c("logFixDur", "MW"))
## `logFixDur` was not found in model terms. Maybe misspelled?
## Can't compute estimated marginal means, 'emmeans::emmeans()' returned an error.
##
## Reason: No variable named logFixDur in the reference grid
## You may try 'ggpredict()' or 'ggeffect()'.
## NULL
emmeans(m.mw, pairwise ~ MW, p.adjust = "fdr")
## $emmeans
## MW emmean SE df asymp.LCL asymp.UCL
## 0 3.77 2.68 Inf -1.476 9.01
## 1 5.47 2.90 Inf -0.223 11.16
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## 0 - 1 -1.7 2.08 Inf -0.814 0.4155
##
## Degrees-of-freedom method: asymptotic
bobby’s mode: MW label at page level, surprosal and n400 at fixation level
# mB <- allFit(lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw))
mB <- lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw, control=lmerControl(optimizer='Nelder_Mead'))
summary(mB)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: n400_mean_CPz ~ 1 + MW * logSurprisalText + (MW * logSurprisalText |
## pID) + (logSurprisalText | pID:identifier)
## Data: df_mw
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 268460.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.8943 -0.5463 0.0153 0.5626 5.7741
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pID:identifier (Intercept) 1654.44 40.675
## logSurprisalText 18.15 4.260 -1.00
## pID (Intercept) 579.29 24.068
## MW1 11698.49 108.160 0.02
## logSurprisalText 20.62 4.541 -0.19 -0.86
## MW1:logSurprisalText 389.91 19.746 -0.26 -0.95 0.76
## Residual 9416.73 97.040
## Number of obs: 22322, groups: pID:identifier, 301; pID, 91
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.7955 4.3513 64.0041 1.102 0.275
## MW1 4.6060 15.1917 461.7923 0.303 0.762
## logSurprisalText 0.2108 0.9029 44.2160 0.234 0.816
## MW1:logSurprisalText -1.8703 3.0574 18.9739 -0.612 0.548
##
## Correlation of Fixed Effects:
## (Intr) MW1 lgSrpT
## MW1 -0.209
## lgSrprslTxt -0.455 -0.144
## MW1:lgSrprT 0.046 -0.909 -0.021
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
tab_model(mB)
|
|
n 400 mean C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
4.80
|
-3.73 – 13.32
|
0.270
|
|
MW [1]
|
4.61
|
-25.17 – 34.38
|
0.762
|
|
logSurprisalText
|
0.21
|
-1.56 – 1.98
|
0.815
|
|
MW [1] * logSurprisalText
|
-1.87
|
-7.86 – 4.12
|
0.541
|
|
Random Effects
|
|
σ2
|
9416.73
|
|
τ00 pID:identifier
|
1654.44
|
|
τ00 pID
|
579.29
|
|
τ11 pID:identifier.logSurprisalText
|
18.15
|
|
τ11 pID.MW1
|
11698.49
|
|
τ11 pID.logSurprisalText
|
20.62
|
|
τ11 pID.MW1:logSurprisalText
|
389.91
|
|
ρ01 pID:identifier
|
-1.00
|
|
ρ01 pID.MW1
|
0.02
|
|
ρ01 pID.logSurprisalText
|
-0.19
|
|
ρ01 pID.MW1:logSurprisalText
|
-0.26
|
|
ICC
|
0.34
|
|
N pID
|
91
|
|
N identifier
|
20
|
|
Observations
|
22322
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.338
|
min n400
# mB <- allFit(lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw))
mB.min <- lmer(n400_min_magnitude_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw, control=lmerControl(optimizer='Nelder_Mead'))
summary(mB.min)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## n400_min_magnitude_CPz ~ 1 + MW * logSurprisalText + (MW * logSurprisalText |
## pID) + (logSurprisalText | pID:identifier)
## Data: df_mw
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 272324.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.8703 -0.5359 0.0344 0.5788 5.1761
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pID:identifier (Intercept) 1897.2779 43.558
## logSurprisalText 14.9662 3.869 -0.74
## pID (Intercept) 1288.5762 35.897
## MW1 104.7103 10.233 -0.55
## logSurprisalText 0.5579 0.747 -0.64 0.84
## MW1:logSurprisalText 35036.4903 187.180 0.38 -0.49 -0.44
## Residual 11069.8068 105.213
## Number of obs: 22322, groups: pID:identifier, 301; pID, 91
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -75.105 5.380 98.438 -13.959 <0.0000000000000002
## MW1 8.644 6.838 125.248 1.264 0.209
## logSurprisalText 0.130 0.782 138.419 0.166 0.868
## MW1:logSurprisalText 2.405 24.482 4951.097 0.098 0.922
##
## (Intercept) ***
## MW1
## logSurprisalText
## MW1:logSurprisalText
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) MW1 lgSrpT
## MW1 -0.513
## lgSrprslTxt -0.354 0.253
## MW1:lgSrprT 0.196 -0.050 -0.063
## optimizer (Nelder_Mead) convergence code: 4 (failure to converge in 10000 evaluations)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## failure to converge in 10000 evaluations
tab_model(mB.min)
|
|
n 400 min magnitude C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
-75.10
|
-85.65 – -64.56
|
<0.001
|
|
MW [1]
|
8.64
|
-4.76 – 22.05
|
0.206
|
|
logSurprisalText
|
0.13
|
-1.40 – 1.66
|
0.868
|
|
MW [1] * logSurprisalText
|
2.41
|
-45.58 – 50.39
|
0.922
|
|
Random Effects
|
|
σ2
|
11069.81
|
|
τ00 pID:identifier
|
1897.28
|
|
τ00 pID
|
1288.58
|
|
τ11 pID:identifier.logSurprisalText
|
14.97
|
|
τ11 pID.MW1
|
104.71
|
|
τ11 pID.logSurprisalText
|
0.56
|
|
τ11 pID.MW1:logSurprisalText
|
35036.49
|
|
ρ01 pID:identifier
|
-0.74
|
|
ρ01 pID.MW1
|
-0.55
|
|
ρ01 pID.logSurprisalText
|
-0.64
|
|
ρ01 pID.MW1:logSurprisalText
|
0.38
|
|
ICC
|
0.79
|
|
N pID
|
91
|
|
N identifier
|
20
|
|
Observations
|
22322
|
|
Marginal R2 / Conditional R2
|
0.001 / 0.790
|
max n400
# mB <- allFit(lmer(n400_mean_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw))
mB.max <- lmer(n400_max_magnitude_CPz ~ 1 + MW*logSurprisalText + (MW*logSurprisalText|pID) + (logSurprisalText|pID:identifier), data=df_mw, control=lmerControl(optimizer='Nelder_Mead'))
summary(mB.max)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## n400_max_magnitude_CPz ~ 1 + MW * logSurprisalText + (MW * logSurprisalText |
## pID) + (logSurprisalText | pID:identifier)
## Data: df_mw
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 269299.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.6498 -0.5492 0.0005 0.5668 5.7653
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## pID:identifier (Intercept) 1720.701 41.481
## logSurprisalText 0.996 0.998 -1.00
## pID (Intercept) 1540.422 39.248
## MW1 325.368 18.038 -1.00
## logSurprisalText 11.519 3.394 0.36 -0.38
## MW1:logSurprisalText 694.098 26.346 0.40 -0.39 -0.21
## Residual 9745.094 98.717
## Number of obs: 22322, groups: pID:identifier, 301; pID, 91
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 79.8938 5.5434 49.4935 14.412 <0.0000000000000002
## MW1 4.2239 6.4477 153.9680 0.655 0.513
## logSurprisalText 0.3479 0.7790 67.8454 0.447 0.657
## MW1:logSurprisalText -0.5603 3.6415 365.6702 -0.154 0.878
##
## (Intercept) ***
## MW1
## logSurprisalText
## MW1:logSurprisalText
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) MW1 lgSrpT
## MW1 -0.641
## lgSrprslTxt -0.031 0.069
## MW1:lgSrprT 0.228 -0.119 -0.226
## optimizer (Nelder_Mead) convergence code: 4 (failure to converge in 10000 evaluations)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## failure to converge in 10000 evaluations
tab_model(mB.max)
|
|
n 400 max magnitude C Pz
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
79.89
|
69.03 – 90.76
|
<0.001
|
|
MW [1]
|
4.22
|
-8.41 – 16.86
|
0.512
|
|
logSurprisalText
|
0.35
|
-1.18 – 1.87
|
0.655
|
|
MW [1] * logSurprisalText
|
-0.56
|
-7.70 – 6.58
|
0.878
|
|
Random Effects
|
|
σ2
|
9745.09
|
|
τ00 pID:identifier
|
1720.70
|
|
τ00 pID
|
1540.42
|
|
τ11 pID:identifier.logSurprisalText
|
1.00
|
|
τ11 pID.MW1
|
325.37
|
|
τ11 pID.logSurprisalText
|
11.52
|
|
τ11 pID.MW1:logSurprisalText
|
694.10
|
|
ρ01 pID:identifier
|
-1.00
|
|
ρ01 pID.MW1
|
-1.00
|
|
ρ01 pID.logSurprisalText
|
0.36
|
|
ρ01 pID.MW1:logSurprisalText
|
0.40
|
|
ICC
|
0.28
|
|
N pID
|
91
|
|
N identifier
|
20
|
|
Observations
|
22322
|
|
Marginal R2 / Conditional R2
|
0.000 / 0.279
|
Eye-mind-linkage metric at page level
note atanh transform for correlation when it is to be used in model
eml_page <- df %>% group_by(pID, identifier) %>% summarise(
count=n(),
cor_n400meanCPz_logSurprisalText = atanh(cor(logSurprisalText, n400_mean_CPz)),
cor_n400minCPz_logSurprisalText = atanh(cor(logSurprisalText, n400_min_magnitude_CPz)),
cor_n400maxCPz_logSurprisalText = atanh(cor(logSurprisalText, n400_max_magnitude_CPz)),
cor_n400meanCPz_logWordFreq = atanh(cor(logWordFreq, n400_mean_CPz)),
cor_P1_logSurprisalText = atanh(cor(logSurprisalText, p1_mean)),
cor_N1_logSurprisalText = atanh(cor(logSurprisalText, n1_mean)),
cor_P1_logWordFreq = atanh(cor(logWordFreq, p1_mean)),
cor_N1_logWordFreq = atanh(cor(logWordFreq, n1_mean)),
cor_logFixDur_logSurprisalText = atanh(cor(logSurprisalText, logFixDur)),
meanLogSurprisalText=mean(logSurprisalText),
meanP1=mean(p1_mean),
meanN1=mean(n1_mean),
meanN400=mean(n400_mean_CPz)
) %>% drop_na() %>%
filter(count>2) # remove instances with single or two fixation on page as correlation coeff will be 1 or -1
eml_page[Reduce(`&`, lapply(eml_page, is.finite)),]
## # A tibble: 0 × 16
## # Groups: pID [0]
## # … with 16 variables: pID <chr>, identifier <fct>, count <int>,
## # cor_n400meanCPz_logSurprisalText <dbl>,
## # cor_n400minCPz_logSurprisalText <dbl>,
## # cor_n400maxCPz_logSurprisalText <dbl>, cor_n400meanCPz_logWordFreq <dbl>,
## # cor_P1_logSurprisalText <dbl>, cor_N1_logSurprisalText <dbl>,
## # cor_P1_logWordFreq <dbl>, cor_N1_logWordFreq <dbl>,
## # cor_logFixDur_logSurprisalText <dbl>, meanLogSurprisalText <dbl>, …
df_page <- left_join(eml_page, df_comp, on=c('pID', 'identifer'))
df_page$MW = as.factor(df_page$MW)
# plot the new metrics
p1<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=cor_n400meanCPz_logSurprisalText, color=MW))
p2<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=cor_logFixDur_logSurprisalText, color=MW))
p3<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=cor_P1_logSurprisalText , color=MW))
p4<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=cor_N1_logSurprisalText , color=MW))
p5<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=cor_P1_logWordFreq , color=MW))
p6<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=cor_N1_logWordFreq , color=MW))
grid.arrange(p1,p2,p3,p4, p5,p6)

p1<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=meanP1, color=MW))
p2<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=meanN1, color=MW))
p3<-ggplot(df_page %>% drop_na(MW))+
geom_density( aes(x=meanN400 , color=MW))
grid.arrange(p1,p2,p3)

# plot by participantID
# ggplot(df_page, aes(x=cor_n400meanCPz_logSurprisalText, y=pID)) +
# geom_boxplot( horizontal = TRUE, outlier.colour="red",
# outlier.fill="red",
# outlier.size=0.2)
binomial regression: predict page level MW from page level correlations with surprisal
# m.mw.mean <- glmer(MW ~ cor_n400meanCPz_logSurprisalText + (1|identifier) + (1+cor_n400meanCPz_logSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
# plot_model(m.mw.mean,type = "emm", terms=c("cor_n400meanCPz_logSurprisalText"))
#
# m.mw.min <- glmer(MW ~ cor_n400minCPz_logSurprisalText + (1|identifier) + (1+cor_n400minCPz_logSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
# plot_model(m.mw.min,type = "emm", terms=c("cor_n400minCPz_logSurprisalText"))
#
# m.mw.max <- glmer(MW ~ cor_n400maxCPz_logSurprisalText + (1|identifier) + (1+cor_n400maxCPz_logSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
# plot_model(m.mw.max,type = "emm", terms=c("cor_n400maxCPz_logSurprisalText"))
m.mw.all <- glmer(MW ~
cor_P1_logWordFreq + cor_P1_logSurprisalText + cor_N1_logWordFreq + cor_N1_logSurprisalText +
cor_n400meanCPz_logWordFreq + cor_n400meanCPz_logSurprisalText +
(1|identifier) + (1|pID), data=df_page %>% drop_na(MW), family = "binomial")
m.mw.n400 <- glmer(MW ~
# cor_P1_logWordFreq + cor_P1_logSurprisalText + cor_N1_logWordFreq + cor_N1_logSurprisalText +
cor_n400meanCPz_logWordFreq + cor_n400meanCPz_logSurprisalText +
(1|identifier) + (1|pID), data=df_page %>% drop_na(MW), family = "binomial")
m.mw.main <- glmer(MW ~
# cor_P1_logWordFreq + cor_P1_logSurprisalText + cor_N1_logWordFreq + cor_N1_logSurprisalText +
meanP1 + meanN1 + meanN400+
(1|identifier) + (1|pID), data=df_page %>% drop_na(MW), family = "binomial")
# tab_model(m.mw.mean, m.mw.min, m.mw.max)
# tab_model(m.mw.mean)
tab_model(m.mw.all)
|
|
MW
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.45
|
0.24 – 0.83
|
0.012
|
|
cor P1 logWordFreq
|
0.26
|
0.02 – 4.52
|
0.359
|
|
cor P1 logSurprisalText
|
0.12
|
0.01 – 1.70
|
0.117
|
|
cor N1 logWordFreq
|
1.00
|
0.09 – 11.56
|
0.997
|
|
cor N1 logSurprisalText
|
1.69
|
0.19 – 14.63
|
0.635
|
cor n400meanCPz logWordFreq
|
1.13
|
0.22 – 5.86
|
0.888
|
cor n400meanCPz logSurprisalText
|
0.24
|
0.05 – 1.15
|
0.075
|
|
Random Effects
|
|
σ2
|
3.29
|
|
τ00 pID
|
1.46
|
|
τ00 identifier
|
0.95
|
|
ICC
|
0.42
|
|
N identifier
|
20
|
|
N pID
|
91
|
|
Observations
|
287
|
|
Marginal R2 / Conditional R2
|
0.066 / 0.461
|
tab_model(m.mw.n400)
|
|
MW
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.47
|
0.27 – 0.84
|
0.010
|
cor n400meanCPz logWordFreq
|
1.51
|
0.43 – 5.24
|
0.520
|
cor n400meanCPz logSurprisalText
|
0.35
|
0.09 – 1.29
|
0.114
|
|
Random Effects
|
|
σ2
|
3.29
|
|
τ00 pID
|
1.28
|
|
τ00 identifier
|
0.74
|
|
ICC
|
0.38
|
|
N identifier
|
20
|
|
N pID
|
91
|
|
Observations
|
287
|
|
Marginal R2 / Conditional R2
|
0.040 / 0.405
|
tab_model(m.mw.main)
|
|
MW
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.64
|
0.64 – 0.64
|
<0.001
|
|
meanP1
|
0.99
|
0.99 – 0.99
|
<0.001
|
|
meanN1
|
1.01
|
1.00 – 1.01
|
<0.001
|
|
meanN400
|
1.00
|
1.00 – 1.01
|
0.001
|
|
Random Effects
|
|
σ2
|
3.29
|
|
τ00 pID
|
1.28
|
|
τ00 identifier
|
0.68
|
|
ICC
|
0.37
|
|
N identifier
|
20
|
|
N pID
|
91
|
|
Observations
|
287
|
|
Marginal R2 / Conditional R2
|
0.021 / 0.386
|
# emmeans(m.mw.mean, pairwise ~ cor_n400meanCPz_logSurprisalText, p.adjust = "fdr")
Does page-average surprisal predict MW
m.mw.sp <- glmer(MW ~ meanLogSurprisalText + (1+meanLogSurprisalText|pID), data=df_page %>% drop_na(MW), family = "binomial")
tab_model(m.mw.sp)
|
|
MW
|
|
Predictors
|
Odds Ratios
|
CI
|
p
|
|
(Intercept)
|
0.25
|
0.08 – 0.81
|
0.020
|
|
meanLogSurprisalText
|
1.96
|
0.61 – 6.24
|
0.256
|
|
Random Effects
|
|
σ2
|
3.29
|
|
τ00 pID
|
1.91
|
|
τ11 pID.meanLogSurprisalText
|
4.98
|
|
ρ01 pID
|
-0.93
|
|
ICC
|
0.36
|
|
N pID
|
91
|
|
Observations
|
287
|
|
Marginal R2 / Conditional R2
|
0.010 / 0.367
|